{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import dautil as dl\n",
    "from scipy import stats\n",
    "import numpy as np\n",
    "import math\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.html import widgets\n",
    "from IPython.display import display\n",
    "from IPython.display import HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "lr = dl.nb.LatexRenderer(chapter=3, start=17)\n",
    "lr.render(r'\\rho = {1- \\frac {6 \\sum d_i^2}{n(n^2 - 1)}}')\n",
    "lr.render(r'd_i = x_i - y_i')\n",
    "lr.render(r'\\sigma = \\frac{ 0.6325 }{ \\sqrt{n-1} }')\n",
    "lr.render(r'z = \\sqrt{\\frac{n-3}{1.06}}F(r)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_ci(n, corr):\n",
    "    z = math.sqrt((n - 3)/1.06) * np.arctanh(corr)\n",
    "    se = 0.6325/(math.sqrt(n - 1))\n",
    "    ci = z + np.array([-1, 1]) * se * stats.norm.ppf((1 + 0.95)/2)\n",
    "\n",
    "    return np.tanh(ci)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = dl.data.Weather.load().dropna()\n",
    "df = dl.ts.groupby_yday(df).mean()\n",
    "\n",
    "drop1 = widgets.Dropdown(options=dl.data.Weather.get_headers(), \n",
    "                         selected_label='TEMP', description='Variable 1')\n",
    "drop2 = widgets.Dropdown(options=dl.data.Weather.get_headers(), \n",
    "                         selected_label='WIND_SPEED', description='Variable 2')\n",
    "display(drop1)\n",
    "display(drop2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "var1 = df[drop1.value].values\n",
    "var2 = df[drop2.value].values\n",
    "stats_corr = stats.spearmanr(var1, var2)\n",
    "dl.options.set_pd_options()\n",
    "html_builder = dl.report.HTMLBuilder()\n",
    "html_builder.h1('Spearman Correlation between {0} and {1}'.format(\n",
    "    dl.data.Weather.get_header(drop1.value), dl.data.Weather.get_header(drop2.value)))\n",
    "html_builder.h2('scipy.stats.spearmanr()')\n",
    "dfb = dl.report.DFBuilder(['Correlation', 'p-value'])\n",
    "dfb.row([stats_corr[0], stats_corr[1]])\n",
    "html_builder.add_df(dfb.build())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n = len(df.index)\n",
    "ci = get_ci(n, stats_corr)\n",
    "html_builder.h2('Confidence intervale')\n",
    "dfb = dl.report.DFBuilder(['2.5 percentile', '97.5 percentile'])\n",
    "dfb.row(ci)\n",
    "html_builder.add_df(dfb.build())\n",
    "\n",
    "corr = df.corr(method='spearman')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "plt.title('Spearman Correlation Matrix')\n",
    "sns.heatmap(corr)\n",
    "HTML(html_builder.html)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
